#loading necessary libraries
suppressPackageStartupMessages(library("dplyr"))
suppressPackageStartupMessages(library("lubridate"))
suppressPackageStartupMessages(library("plotly"))
suppressPackageStartupMessages(library("ggplot2"))
library(lubridate)
library(plotly)
library(dplyr)
library(ggplot2)
vaccine <- read.csv("https://github.com/ayeayeronn/Covid-19-Vaccination/blob/main/ARCHIVED__COVID-19_Vaccine_Doses_Given_to_San_Franciscans_by_Demographics_Over_Time.csv?raw=true", sep = ",")
head(vaccine)
## DATE_ADMINISTERED ADMINISTERING_PROVIDER_TYPE NEW_1ST_DOSES NEW_2ND_DOSES
## 1 2021/10/17 All Providers 15 17
## 2 2021/10/18 All Providers 24 40
## 3 2021/10/19 All Providers 39 30
## 4 2021/10/20 All Providers 28 41
## 5 2021/10/21 All Providers 31 40
## 6 2021/10/22 All Providers 27 40
## NEW_SINGLE_DOSES NEW_SERIES_COMPLETED NEW_RECIPIENTS CUMULATIVE_1ST_DOSES
## 1 0 17 15 52246
## 2 5 45 29 52270
## 3 1 31 40 52309
## 4 3 44 31 52337
## 5 8 48 40 52368
## 6 5 45 32 52395
## CUMULATIVE_2ND_DOSES CUMULATIVE_SINGLE_DOSES CUMULATIVE_SERIES_COMPLETED
## 1 47994 4478 52472
## 2 48034 4483 52517
## 3 48064 4484 52548
## 4 48105 4487 52592
## 5 48145 4495 52640
## 6 48185 4500 52685
## CUMULATIVE_RECIPIENTS OVERALL_SEGMENT
## 1 56766 Ages 12+ by Age Bracket, Administered by All Providers
## 2 56795 Ages 12+ by Age Bracket, Administered by All Providers
## 3 56835 Ages 12+ by Age Bracket, Administered by All Providers
## 4 56866 Ages 12+ by Age Bracket, Administered by All Providers
## 5 56906 Ages 12+ by Age Bracket, Administered by All Providers
## 6 56938 Ages 12+ by Age Bracket, Administered by All Providers
## AGE_GROUP DEMOGRAPHIC_GROUP DEMOGRAPHIC_SUBGROUP SUBGROUP_POPULATION
## 1 12+ Age Bracket 18-24 62127
## 2 12+ Age Bracket 18-24 62127
## 3 12+ Age Bracket 18-24 62127
## 4 12+ Age Bracket 18-24 62127
## 5 12+ Age Bracket 18-24 62127
## 6 12+ Age Bracket 18-24 62127
## AGE_GROUP_POPULATION DEMOGRAPHIC_SUBGROUP_SORT_ORDER NEW_BOOSTER_DOSES
## 1 791131 4 15
## 2 791131 4 41
## 3 791131 4 48
## 4 791131 4 29
## 5 791131 4 45
## 6 791131 4 71
## NEW_BOOSTER_RECIPIENTS CUMULATIVE_BOOSTER_DOSES CUMULATIVE_BOOSTER_RECIPIENTS
## 1 15 880 871
## 2 41 921 912
## 3 48 969 960
## 4 29 998 989
## 5 45 1043 1034
## 6 71 1114 1105
## NEW_2ND_BOOSTER_RECIPIENTS CUMULATIVE_2ND_BOOSTER_RECIPIENTS
## 1 0 4
## 2 0 4
## 3 0 4
## 4 0 4
## 5 0 4
## 6 0 4
## DATA_AS_OF DATA_UPDATED_AT DATA_LOADED_AT
## 1 10/27/2022 06:00:10 AM 10/27/2022 08:10:34 AM 10/27/2022 10:15:08 AM
## 2 10/27/2022 06:00:10 AM 10/27/2022 08:10:34 AM 10/27/2022 10:15:08 AM
## 3 10/27/2022 06:00:10 AM 10/27/2022 08:10:34 AM 10/27/2022 10:15:08 AM
## 4 10/27/2022 06:00:10 AM 10/27/2022 08:10:34 AM 10/27/2022 10:15:08 AM
## 5 10/27/2022 06:00:10 AM 10/27/2022 08:10:34 AM 10/27/2022 10:15:08 AM
## 6 10/27/2022 06:00:10 AM 10/27/2022 08:10:34 AM 10/27/2022 10:15:08 AM
dim(vaccine)
## [1] 93576 28
vaccine <- vaccine %>%
mutate(DATE_ADMINISTERED = ymd(DATE_ADMINISTERED))
class(vaccine$DATE_ADMINISTERED)
## [1] "Date"
Number of first doses before June 15
vaccine %>%
filter(DATE_ADMINISTERED < "2021-06-15") %>%
summarise(num_new_doses_before_june15 = sum(NEW_1ST_DOSES))
## num_new_doses_before_june15
## 1 5188428
Number of first doses after June 15
vaccine %>%
filter(DATE_ADMINISTERED > "2021-06-15") %>%
summarise(num_new_doses_after_june15 = sum(NEW_1ST_DOSES))
## num_new_doses_after_june15
## 1 1119615
Number of first doses as of September 12
vaccine %>%
filter(DATE_ADMINISTERED <= "2021-09-12") %>%
summarise(num_new_doses_of_sep12 = sum(NEW_1ST_DOSES))
## num_new_doses_of_sep12
## 1 5555787
Number of single vs first vs second doses before June 15th
vaccine %>%
filter(DATE_ADMINISTERED < "2021-06-15") %>%
summarise(total_single_dose_before_June15 = sum(NEW_SINGLE_DOSES), num_new_doses_before_june15 = sum(NEW_1ST_DOSES), num_2nd_doses_before_June15 = sum(NEW_2ND_DOSES))
## total_single_dose_before_June15 num_new_doses_before_june15
## 1 372574 5188428
## num_2nd_doses_before_June15
## 1 4822264
Number of single vs first vs second doses after June 15th
vaccine %>%
filter(DATE_ADMINISTERED > "2021-06-15") %>%
summarise(total_single_dose_after_June15 = sum(NEW_SINGLE_DOSES), num_new_doses_after_june15 = sum(NEW_1ST_DOSES), num_2nd_doses_after_june15 = sum(NEW_2ND_DOSES))
## total_single_dose_after_June15 num_new_doses_after_june15
## 1 91336 1119615
## num_2nd_doses_after_june15
## 1 1007085
Number of single vs first vs second doses as of September 12
vaccine %>%
filter(DATE_ADMINISTERED < "2021-09-12") %>%
summarise(num_single_dose_of_Sep12 = sum(NEW_SINGLE_DOSES), num_new_doses_Of_Sep122021 = sum(NEW_1ST_DOSES), num_2nd_doses_of_Sep12 = sum(NEW_2ND_DOSES))
## num_single_dose_of_Sep12 num_new_doses_Of_Sep122021 num_2nd_doses_of_Sep12
## 1 430311 5554580 5229592
vaccine <- vaccine %>%
filter(DATE_ADMINISTERED < "2021-09-18") %>%
mutate(MONTH = month(DATE_ADMINISTERED))
December and January have a low number of vaccinations because only healthcare and essential workers had access to the vaccine. February, March, and April have the highest number of vaccinations because the vaccine distribution has been rolling out on a tier system. As the months go on, it seems like the population getting vaccinated is reaching a plateau.
vaccine %>%
group_by(MONTH) %>%
mutate(MONTH = month(MONTH, label = TRUE)) %>%
summarise(num_of_vaccinations = sum(NEW_1ST_DOSES) + sum(NEW_2ND_DOSES) + sum(NEW_SINGLE_DOSES)) %>%
arrange(desc(num_of_vaccinations))
## # A tibble: 10 × 2
## MONTH num_of_vaccinations
## <ord> <int>
## 1 Apr 2960163
## 2 Mar 2857746
## 3 May 1676516
## 4 Feb 1671916
## 5 Jan 749954
## 6 Jun 558723
## 7 Aug 276478
## 8 Jul 266207
## 9 Sep 125839
## 10 Dec 114068
vaccine %>% select(DEMOGRAPHIC_GROUP, DEMOGRAPHIC_SUBGROUP) %>%
filter(DEMOGRAPHIC_GROUP == "Race/Ethnicity") %>%
unique()
## DEMOGRAPHIC_GROUP DEMOGRAPHIC_SUBGROUP
## 1 Race/Ethnicity Asian
## 278 Race/Ethnicity Black or African American
## 554 Race/Ethnicity Native Hawaiian or Other Pacific Islander
## 790 Race/Ethnicity Hispanic or Latino/a, all races
## 1103 Race/Ethnicity Other Race
## 1170 Race/Ethnicity American Indian or Alaska Native
## 1588 Race/Ethnicity Unknown
## 1864 Race/Ethnicity Multi-Racial
## 2642 Race/Ethnicity White
vaccine <- vaccine %>%
mutate(RACE = ifelse(DEMOGRAPHIC_SUBGROUP %in% c("American Indian or Alaska Native", "Native Hawaiian or Other Pacific Islander", "Asian", "Hispanic or Latino/a, all races", "Multi-Racial", "Other Race", "White", "Black or African American","Unknown"), DEMOGRAPHIC_SUBGROUP, "N/A"))
vaccine %>%
filter(RACE != "N/A") %>%
group_by(RACE, DATE_ADMINISTERED) %>%
summarise(dose = sum(NEW_1ST_DOSES)) %>%
ggplot(., aes(x = DATE_ADMINISTERED, y = dose, color = RACE)) + geom_line() + theme(legend.position = c(0.65, 0.6)) +
labs(title = "Number of Covid Vaccines by Race", x = "Date Administered", y = "Total number of Doses")
## `summarise()` has grouped output by 'RACE'. You can override using the
## `.groups` argument.
ggplotly() %>% layout(width = 1700, height = 600)
## Warning: Specifying width/height in layout() is now deprecated.
## Please specify in ggplotly() or plot_ly()
suppressPackageStartupMessages(library("reshape"))
library(reshape)
combined <- vaccine %>%
select(DATE_ADMINISTERED,NEW_1ST_DOSES, NEW_2ND_DOSES, NEW_SINGLE_DOSES)
combined[] <-lapply(combined, unlist)
str(combined)
## 'data.frame': 36683 obs. of 4 variables:
## $ DATE_ADMINISTERED: Date, format: "2021-03-16" "2021-03-17" ...
## $ NEW_1ST_DOSES : int 64 66 32 34 49 26 51 52 60 59 ...
## $ NEW_2ND_DOSES : int 3 3 8 10 10 6 11 11 9 24 ...
## $ NEW_SINGLE_DOSES : int 0 0 0 0 0 0 0 1 0 0 ...
combined <- melt(combined, "DATE_ADMINISTERED")
combined %>%
group_by(DATE_ADMINISTERED, variable) %>%
summarise(num_dose = sum(value)) %>%
ggplot(., aes(x=DATE_ADMINISTERED, y=num_dose, color=variable)) + geom_line() +
labs(title = "Number of Covid Vaccines by Dose", x = "Date Administered", y = "Total Number of Doses") +
scale_color_discrete(name = "Type of Dose",
labels = c("New 1st Doses", "New 2nd Doses", "New Single Doses"))
## `summarise()` has grouped output by 'DATE_ADMINISTERED'. You can override using
## the `.groups` argument.
ggplotly() %>% layout(width = 1700, height = 600)
## Warning: Specifying width/height in layout() is now deprecated.
## Please specify in ggplotly() or plot_ly()